load("data/samsungData.rda") # load data
dim(samsungData)
## [1] 7352 563
dimdata = dim(samsungData)
dim(samsungData)
## [1] 7352 563
colnames(samsungData)
## [1] "tBodyAcc-mean()-X"
## [2] "tBodyAcc-mean()-Y"
## [3] "tBodyAcc-mean()-Z"
## [4] "tBodyAcc-std()-X"
## [5] "tBodyAcc-std()-Y"
## [6] "tBodyAcc-std()-Z"
## [7] "tBodyAcc-mad()-X"
## [8] "tBodyAcc-mad()-Y"
## [9] "tBodyAcc-mad()-Z"
## [10] "tBodyAcc-max()-X"
## [11] "tBodyAcc-max()-Y"
## [12] "tBodyAcc-max()-Z"
## [13] "tBodyAcc-min()-X"
## [14] "tBodyAcc-min()-Y"
## [15] "tBodyAcc-min()-Z"
## [16] "tBodyAcc-sma()"
## [17] "tBodyAcc-energy()-X"
## [18] "tBodyAcc-energy()-Y"
## [19] "tBodyAcc-energy()-Z"
## [20] "tBodyAcc-iqr()-X"
## [21] "tBodyAcc-iqr()-Y"
## [22] "tBodyAcc-iqr()-Z"
## [23] "tBodyAcc-entropy()-X"
## [24] "tBodyAcc-entropy()-Y"
## [25] "tBodyAcc-entropy()-Z"
## [26] "tBodyAcc-arCoeff()-X,1"
## [27] "tBodyAcc-arCoeff()-X,2"
## [28] "tBodyAcc-arCoeff()-X,3"
## [29] "tBodyAcc-arCoeff()-X,4"
## [30] "tBodyAcc-arCoeff()-Y,1"
## [31] "tBodyAcc-arCoeff()-Y,2"
## [32] "tBodyAcc-arCoeff()-Y,3"
## [33] "tBodyAcc-arCoeff()-Y,4"
## [34] "tBodyAcc-arCoeff()-Z,1"
## [35] "tBodyAcc-arCoeff()-Z,2"
## [36] "tBodyAcc-arCoeff()-Z,3"
## [37] "tBodyAcc-arCoeff()-Z,4"
## [38] "tBodyAcc-correlation()-X,Y"
## [39] "tBodyAcc-correlation()-X,Z"
## [40] "tBodyAcc-correlation()-Y,Z"
## [41] "tGravityAcc-mean()-X"
## [42] "tGravityAcc-mean()-Y"
## [43] "tGravityAcc-mean()-Z"
## [44] "tGravityAcc-std()-X"
## [45] "tGravityAcc-std()-Y"
## [46] "tGravityAcc-std()-Z"
## [47] "tGravityAcc-mad()-X"
## [48] "tGravityAcc-mad()-Y"
## [49] "tGravityAcc-mad()-Z"
## [50] "tGravityAcc-max()-X"
## [51] "tGravityAcc-max()-Y"
## [52] "tGravityAcc-max()-Z"
## [53] "tGravityAcc-min()-X"
## [54] "tGravityAcc-min()-Y"
## [55] "tGravityAcc-min()-Z"
## [56] "tGravityAcc-sma()"
## [57] "tGravityAcc-energy()-X"
## [58] "tGravityAcc-energy()-Y"
## [59] "tGravityAcc-energy()-Z"
## [60] "tGravityAcc-iqr()-X"
## [61] "tGravityAcc-iqr()-Y"
## [62] "tGravityAcc-iqr()-Z"
## [63] "tGravityAcc-entropy()-X"
## [64] "tGravityAcc-entropy()-Y"
## [65] "tGravityAcc-entropy()-Z"
## [66] "tGravityAcc-arCoeff()-X,1"
## [67] "tGravityAcc-arCoeff()-X,2"
## [68] "tGravityAcc-arCoeff()-X,3"
## [69] "tGravityAcc-arCoeff()-X,4"
## [70] "tGravityAcc-arCoeff()-Y,1"
## [71] "tGravityAcc-arCoeff()-Y,2"
## [72] "tGravityAcc-arCoeff()-Y,3"
## [73] "tGravityAcc-arCoeff()-Y,4"
## [74] "tGravityAcc-arCoeff()-Z,1"
## [75] "tGravityAcc-arCoeff()-Z,2"
## [76] "tGravityAcc-arCoeff()-Z,3"
## [77] "tGravityAcc-arCoeff()-Z,4"
## [78] "tGravityAcc-correlation()-X,Y"
## [79] "tGravityAcc-correlation()-X,Z"
## [80] "tGravityAcc-correlation()-Y,Z"
## [81] "tBodyAccJerk-mean()-X"
## [82] "tBodyAccJerk-mean()-Y"
## [83] "tBodyAccJerk-mean()-Z"
## [84] "tBodyAccJerk-std()-X"
## [85] "tBodyAccJerk-std()-Y"
## [86] "tBodyAccJerk-std()-Z"
## [87] "tBodyAccJerk-mad()-X"
## [88] "tBodyAccJerk-mad()-Y"
## [89] "tBodyAccJerk-mad()-Z"
## [90] "tBodyAccJerk-max()-X"
## [91] "tBodyAccJerk-max()-Y"
## [92] "tBodyAccJerk-max()-Z"
## [93] "tBodyAccJerk-min()-X"
## [94] "tBodyAccJerk-min()-Y"
## [95] "tBodyAccJerk-min()-Z"
## [96] "tBodyAccJerk-sma()"
## [97] "tBodyAccJerk-energy()-X"
## [98] "tBodyAccJerk-energy()-Y"
## [99] "tBodyAccJerk-energy()-Z"
## [100] "tBodyAccJerk-iqr()-X"
## [101] "tBodyAccJerk-iqr()-Y"
## [102] "tBodyAccJerk-iqr()-Z"
## [103] "tBodyAccJerk-entropy()-X"
## [104] "tBodyAccJerk-entropy()-Y"
## [105] "tBodyAccJerk-entropy()-Z"
## [106] "tBodyAccJerk-arCoeff()-X,1"
## [107] "tBodyAccJerk-arCoeff()-X,2"
## [108] "tBodyAccJerk-arCoeff()-X,3"
## [109] "tBodyAccJerk-arCoeff()-X,4"
## [110] "tBodyAccJerk-arCoeff()-Y,1"
## [111] "tBodyAccJerk-arCoeff()-Y,2"
## [112] "tBodyAccJerk-arCoeff()-Y,3"
## [113] "tBodyAccJerk-arCoeff()-Y,4"
## [114] "tBodyAccJerk-arCoeff()-Z,1"
## [115] "tBodyAccJerk-arCoeff()-Z,2"
## [116] "tBodyAccJerk-arCoeff()-Z,3"
## [117] "tBodyAccJerk-arCoeff()-Z,4"
## [118] "tBodyAccJerk-correlation()-X,Y"
## [119] "tBodyAccJerk-correlation()-X,Z"
## [120] "tBodyAccJerk-correlation()-Y,Z"
## [121] "tBodyGyro-mean()-X"
## [122] "tBodyGyro-mean()-Y"
## [123] "tBodyGyro-mean()-Z"
## [124] "tBodyGyro-std()-X"
## [125] "tBodyGyro-std()-Y"
## [126] "tBodyGyro-std()-Z"
## [127] "tBodyGyro-mad()-X"
## [128] "tBodyGyro-mad()-Y"
## [129] "tBodyGyro-mad()-Z"
## [130] "tBodyGyro-max()-X"
## [131] "tBodyGyro-max()-Y"
## [132] "tBodyGyro-max()-Z"
## [133] "tBodyGyro-min()-X"
## [134] "tBodyGyro-min()-Y"
## [135] "tBodyGyro-min()-Z"
## [136] "tBodyGyro-sma()"
## [137] "tBodyGyro-energy()-X"
## [138] "tBodyGyro-energy()-Y"
## [139] "tBodyGyro-energy()-Z"
## [140] "tBodyGyro-iqr()-X"
## [141] "tBodyGyro-iqr()-Y"
## [142] "tBodyGyro-iqr()-Z"
## [143] "tBodyGyro-entropy()-X"
## [144] "tBodyGyro-entropy()-Y"
## [145] "tBodyGyro-entropy()-Z"
## [146] "tBodyGyro-arCoeff()-X,1"
## [147] "tBodyGyro-arCoeff()-X,2"
## [148] "tBodyGyro-arCoeff()-X,3"
## [149] "tBodyGyro-arCoeff()-X,4"
## [150] "tBodyGyro-arCoeff()-Y,1"
## [151] "tBodyGyro-arCoeff()-Y,2"
## [152] "tBodyGyro-arCoeff()-Y,3"
## [153] "tBodyGyro-arCoeff()-Y,4"
## [154] "tBodyGyro-arCoeff()-Z,1"
## [155] "tBodyGyro-arCoeff()-Z,2"
## [156] "tBodyGyro-arCoeff()-Z,3"
## [157] "tBodyGyro-arCoeff()-Z,4"
## [158] "tBodyGyro-correlation()-X,Y"
## [159] "tBodyGyro-correlation()-X,Z"
## [160] "tBodyGyro-correlation()-Y,Z"
## [161] "tBodyGyroJerk-mean()-X"
## [162] "tBodyGyroJerk-mean()-Y"
## [163] "tBodyGyroJerk-mean()-Z"
## [164] "tBodyGyroJerk-std()-X"
## [165] "tBodyGyroJerk-std()-Y"
## [166] "tBodyGyroJerk-std()-Z"
## [167] "tBodyGyroJerk-mad()-X"
## [168] "tBodyGyroJerk-mad()-Y"
## [169] "tBodyGyroJerk-mad()-Z"
## [170] "tBodyGyroJerk-max()-X"
## [171] "tBodyGyroJerk-max()-Y"
## [172] "tBodyGyroJerk-max()-Z"
## [173] "tBodyGyroJerk-min()-X"
## [174] "tBodyGyroJerk-min()-Y"
## [175] "tBodyGyroJerk-min()-Z"
## [176] "tBodyGyroJerk-sma()"
## [177] "tBodyGyroJerk-energy()-X"
## [178] "tBodyGyroJerk-energy()-Y"
## [179] "tBodyGyroJerk-energy()-Z"
## [180] "tBodyGyroJerk-iqr()-X"
## [181] "tBodyGyroJerk-iqr()-Y"
## [182] "tBodyGyroJerk-iqr()-Z"
## [183] "tBodyGyroJerk-entropy()-X"
## [184] "tBodyGyroJerk-entropy()-Y"
## [185] "tBodyGyroJerk-entropy()-Z"
## [186] "tBodyGyroJerk-arCoeff()-X,1"
## [187] "tBodyGyroJerk-arCoeff()-X,2"
## [188] "tBodyGyroJerk-arCoeff()-X,3"
## [189] "tBodyGyroJerk-arCoeff()-X,4"
## [190] "tBodyGyroJerk-arCoeff()-Y,1"
## [191] "tBodyGyroJerk-arCoeff()-Y,2"
## [192] "tBodyGyroJerk-arCoeff()-Y,3"
## [193] "tBodyGyroJerk-arCoeff()-Y,4"
## [194] "tBodyGyroJerk-arCoeff()-Z,1"
## [195] "tBodyGyroJerk-arCoeff()-Z,2"
## [196] "tBodyGyroJerk-arCoeff()-Z,3"
## [197] "tBodyGyroJerk-arCoeff()-Z,4"
## [198] "tBodyGyroJerk-correlation()-X,Y"
## [199] "tBodyGyroJerk-correlation()-X,Z"
## [200] "tBodyGyroJerk-correlation()-Y,Z"
## [201] "tBodyAccMag-mean()"
## [202] "tBodyAccMag-std()"
## [203] "tBodyAccMag-mad()"
## [204] "tBodyAccMag-max()"
## [205] "tBodyAccMag-min()"
## [206] "tBodyAccMag-sma()"
## [207] "tBodyAccMag-energy()"
## [208] "tBodyAccMag-iqr()"
## [209] "tBodyAccMag-entropy()"
## [210] "tBodyAccMag-arCoeff()1"
## [211] "tBodyAccMag-arCoeff()2"
## [212] "tBodyAccMag-arCoeff()3"
## [213] "tBodyAccMag-arCoeff()4"
## [214] "tGravityAccMag-mean()"
## [215] "tGravityAccMag-std()"
## [216] "tGravityAccMag-mad()"
## [217] "tGravityAccMag-max()"
## [218] "tGravityAccMag-min()"
## [219] "tGravityAccMag-sma()"
## [220] "tGravityAccMag-energy()"
## [221] "tGravityAccMag-iqr()"
## [222] "tGravityAccMag-entropy()"
## [223] "tGravityAccMag-arCoeff()1"
## [224] "tGravityAccMag-arCoeff()2"
## [225] "tGravityAccMag-arCoeff()3"
## [226] "tGravityAccMag-arCoeff()4"
## [227] "tBodyAccJerkMag-mean()"
## [228] "tBodyAccJerkMag-std()"
## [229] "tBodyAccJerkMag-mad()"
## [230] "tBodyAccJerkMag-max()"
## [231] "tBodyAccJerkMag-min()"
## [232] "tBodyAccJerkMag-sma()"
## [233] "tBodyAccJerkMag-energy()"
## [234] "tBodyAccJerkMag-iqr()"
## [235] "tBodyAccJerkMag-entropy()"
## [236] "tBodyAccJerkMag-arCoeff()1"
## [237] "tBodyAccJerkMag-arCoeff()2"
## [238] "tBodyAccJerkMag-arCoeff()3"
## [239] "tBodyAccJerkMag-arCoeff()4"
## [240] "tBodyGyroMag-mean()"
## [241] "tBodyGyroMag-std()"
## [242] "tBodyGyroMag-mad()"
## [243] "tBodyGyroMag-max()"
## [244] "tBodyGyroMag-min()"
## [245] "tBodyGyroMag-sma()"
## [246] "tBodyGyroMag-energy()"
## [247] "tBodyGyroMag-iqr()"
## [248] "tBodyGyroMag-entropy()"
## [249] "tBodyGyroMag-arCoeff()1"
## [250] "tBodyGyroMag-arCoeff()2"
## [251] "tBodyGyroMag-arCoeff()3"
## [252] "tBodyGyroMag-arCoeff()4"
## [253] "tBodyGyroJerkMag-mean()"
## [254] "tBodyGyroJerkMag-std()"
## [255] "tBodyGyroJerkMag-mad()"
## [256] "tBodyGyroJerkMag-max()"
## [257] "tBodyGyroJerkMag-min()"
## [258] "tBodyGyroJerkMag-sma()"
## [259] "tBodyGyroJerkMag-energy()"
## [260] "tBodyGyroJerkMag-iqr()"
## [261] "tBodyGyroJerkMag-entropy()"
## [262] "tBodyGyroJerkMag-arCoeff()1"
## [263] "tBodyGyroJerkMag-arCoeff()2"
## [264] "tBodyGyroJerkMag-arCoeff()3"
## [265] "tBodyGyroJerkMag-arCoeff()4"
## [266] "fBodyAcc-mean()-X"
## [267] "fBodyAcc-mean()-Y"
## [268] "fBodyAcc-mean()-Z"
## [269] "fBodyAcc-std()-X"
## [270] "fBodyAcc-std()-Y"
## [271] "fBodyAcc-std()-Z"
## [272] "fBodyAcc-mad()-X"
## [273] "fBodyAcc-mad()-Y"
## [274] "fBodyAcc-mad()-Z"
## [275] "fBodyAcc-max()-X"
## [276] "fBodyAcc-max()-Y"
## [277] "fBodyAcc-max()-Z"
## [278] "fBodyAcc-min()-X"
## [279] "fBodyAcc-min()-Y"
## [280] "fBodyAcc-min()-Z"
## [281] "fBodyAcc-sma()"
## [282] "fBodyAcc-energy()-X"
## [283] "fBodyAcc-energy()-Y"
## [284] "fBodyAcc-energy()-Z"
## [285] "fBodyAcc-iqr()-X"
## [286] "fBodyAcc-iqr()-Y"
## [287] "fBodyAcc-iqr()-Z"
## [288] "fBodyAcc-entropy()-X"
## [289] "fBodyAcc-entropy()-Y"
## [290] "fBodyAcc-entropy()-Z"
## [291] "fBodyAcc-maxInds-X"
## [292] "fBodyAcc-maxInds-Y"
## [293] "fBodyAcc-maxInds-Z"
## [294] "fBodyAcc-meanFreq()-X"
## [295] "fBodyAcc-meanFreq()-Y"
## [296] "fBodyAcc-meanFreq()-Z"
## [297] "fBodyAcc-skewness()-X"
## [298] "fBodyAcc-kurtosis()-X"
## [299] "fBodyAcc-skewness()-Y"
## [300] "fBodyAcc-kurtosis()-Y"
## [301] "fBodyAcc-skewness()-Z"
## [302] "fBodyAcc-kurtosis()-Z"
## [303] "fBodyAcc-bandsEnergy()-1,8"
## [304] "fBodyAcc-bandsEnergy()-9,16"
## [305] "fBodyAcc-bandsEnergy()-17,24"
## [306] "fBodyAcc-bandsEnergy()-25,32"
## [307] "fBodyAcc-bandsEnergy()-33,40"
## [308] "fBodyAcc-bandsEnergy()-41,48"
## [309] "fBodyAcc-bandsEnergy()-49,56"
## [310] "fBodyAcc-bandsEnergy()-57,64"
## [311] "fBodyAcc-bandsEnergy()-1,16"
## [312] "fBodyAcc-bandsEnergy()-17,32"
## [313] "fBodyAcc-bandsEnergy()-33,48"
## [314] "fBodyAcc-bandsEnergy()-49,64"
## [315] "fBodyAcc-bandsEnergy()-1,24"
## [316] "fBodyAcc-bandsEnergy()-25,48"
## [317] "fBodyAcc-bandsEnergy()-1,8"
## [318] "fBodyAcc-bandsEnergy()-9,16"
## [319] "fBodyAcc-bandsEnergy()-17,24"
## [320] "fBodyAcc-bandsEnergy()-25,32"
## [321] "fBodyAcc-bandsEnergy()-33,40"
## [322] "fBodyAcc-bandsEnergy()-41,48"
## [323] "fBodyAcc-bandsEnergy()-49,56"
## [324] "fBodyAcc-bandsEnergy()-57,64"
## [325] "fBodyAcc-bandsEnergy()-1,16"
## [326] "fBodyAcc-bandsEnergy()-17,32"
## [327] "fBodyAcc-bandsEnergy()-33,48"
## [328] "fBodyAcc-bandsEnergy()-49,64"
## [329] "fBodyAcc-bandsEnergy()-1,24"
## [330] "fBodyAcc-bandsEnergy()-25,48"
## [331] "fBodyAcc-bandsEnergy()-1,8"
## [332] "fBodyAcc-bandsEnergy()-9,16"
## [333] "fBodyAcc-bandsEnergy()-17,24"
## [334] "fBodyAcc-bandsEnergy()-25,32"
## [335] "fBodyAcc-bandsEnergy()-33,40"
## [336] "fBodyAcc-bandsEnergy()-41,48"
## [337] "fBodyAcc-bandsEnergy()-49,56"
## [338] "fBodyAcc-bandsEnergy()-57,64"
## [339] "fBodyAcc-bandsEnergy()-1,16"
## [340] "fBodyAcc-bandsEnergy()-17,32"
## [341] "fBodyAcc-bandsEnergy()-33,48"
## [342] "fBodyAcc-bandsEnergy()-49,64"
## [343] "fBodyAcc-bandsEnergy()-1,24"
## [344] "fBodyAcc-bandsEnergy()-25,48"
## [345] "fBodyAccJerk-mean()-X"
## [346] "fBodyAccJerk-mean()-Y"
## [347] "fBodyAccJerk-mean()-Z"
## [348] "fBodyAccJerk-std()-X"
## [349] "fBodyAccJerk-std()-Y"
## [350] "fBodyAccJerk-std()-Z"
## [351] "fBodyAccJerk-mad()-X"
## [352] "fBodyAccJerk-mad()-Y"
## [353] "fBodyAccJerk-mad()-Z"
## [354] "fBodyAccJerk-max()-X"
## [355] "fBodyAccJerk-max()-Y"
## [356] "fBodyAccJerk-max()-Z"
## [357] "fBodyAccJerk-min()-X"
## [358] "fBodyAccJerk-min()-Y"
## [359] "fBodyAccJerk-min()-Z"
## [360] "fBodyAccJerk-sma()"
## [361] "fBodyAccJerk-energy()-X"
## [362] "fBodyAccJerk-energy()-Y"
## [363] "fBodyAccJerk-energy()-Z"
## [364] "fBodyAccJerk-iqr()-X"
## [365] "fBodyAccJerk-iqr()-Y"
## [366] "fBodyAccJerk-iqr()-Z"
## [367] "fBodyAccJerk-entropy()-X"
## [368] "fBodyAccJerk-entropy()-Y"
## [369] "fBodyAccJerk-entropy()-Z"
## [370] "fBodyAccJerk-maxInds-X"
## [371] "fBodyAccJerk-maxInds-Y"
## [372] "fBodyAccJerk-maxInds-Z"
## [373] "fBodyAccJerk-meanFreq()-X"
## [374] "fBodyAccJerk-meanFreq()-Y"
## [375] "fBodyAccJerk-meanFreq()-Z"
## [376] "fBodyAccJerk-skewness()-X"
## [377] "fBodyAccJerk-kurtosis()-X"
## [378] "fBodyAccJerk-skewness()-Y"
## [379] "fBodyAccJerk-kurtosis()-Y"
## [380] "fBodyAccJerk-skewness()-Z"
## [381] "fBodyAccJerk-kurtosis()-Z"
## [382] "fBodyAccJerk-bandsEnergy()-1,8"
## [383] "fBodyAccJerk-bandsEnergy()-9,16"
## [384] "fBodyAccJerk-bandsEnergy()-17,24"
## [385] "fBodyAccJerk-bandsEnergy()-25,32"
## [386] "fBodyAccJerk-bandsEnergy()-33,40"
## [387] "fBodyAccJerk-bandsEnergy()-41,48"
## [388] "fBodyAccJerk-bandsEnergy()-49,56"
## [389] "fBodyAccJerk-bandsEnergy()-57,64"
## [390] "fBodyAccJerk-bandsEnergy()-1,16"
## [391] "fBodyAccJerk-bandsEnergy()-17,32"
## [392] "fBodyAccJerk-bandsEnergy()-33,48"
## [393] "fBodyAccJerk-bandsEnergy()-49,64"
## [394] "fBodyAccJerk-bandsEnergy()-1,24"
## [395] "fBodyAccJerk-bandsEnergy()-25,48"
## [396] "fBodyAccJerk-bandsEnergy()-1,8"
## [397] "fBodyAccJerk-bandsEnergy()-9,16"
## [398] "fBodyAccJerk-bandsEnergy()-17,24"
## [399] "fBodyAccJerk-bandsEnergy()-25,32"
## [400] "fBodyAccJerk-bandsEnergy()-33,40"
## [401] "fBodyAccJerk-bandsEnergy()-41,48"
## [402] "fBodyAccJerk-bandsEnergy()-49,56"
## [403] "fBodyAccJerk-bandsEnergy()-57,64"
## [404] "fBodyAccJerk-bandsEnergy()-1,16"
## [405] "fBodyAccJerk-bandsEnergy()-17,32"
## [406] "fBodyAccJerk-bandsEnergy()-33,48"
## [407] "fBodyAccJerk-bandsEnergy()-49,64"
## [408] "fBodyAccJerk-bandsEnergy()-1,24"
## [409] "fBodyAccJerk-bandsEnergy()-25,48"
## [410] "fBodyAccJerk-bandsEnergy()-1,8"
## [411] "fBodyAccJerk-bandsEnergy()-9,16"
## [412] "fBodyAccJerk-bandsEnergy()-17,24"
## [413] "fBodyAccJerk-bandsEnergy()-25,32"
## [414] "fBodyAccJerk-bandsEnergy()-33,40"
## [415] "fBodyAccJerk-bandsEnergy()-41,48"
## [416] "fBodyAccJerk-bandsEnergy()-49,56"
## [417] "fBodyAccJerk-bandsEnergy()-57,64"
## [418] "fBodyAccJerk-bandsEnergy()-1,16"
## [419] "fBodyAccJerk-bandsEnergy()-17,32"
## [420] "fBodyAccJerk-bandsEnergy()-33,48"
## [421] "fBodyAccJerk-bandsEnergy()-49,64"
## [422] "fBodyAccJerk-bandsEnergy()-1,24"
## [423] "fBodyAccJerk-bandsEnergy()-25,48"
## [424] "fBodyGyro-mean()-X"
## [425] "fBodyGyro-mean()-Y"
## [426] "fBodyGyro-mean()-Z"
## [427] "fBodyGyro-std()-X"
## [428] "fBodyGyro-std()-Y"
## [429] "fBodyGyro-std()-Z"
## [430] "fBodyGyro-mad()-X"
## [431] "fBodyGyro-mad()-Y"
## [432] "fBodyGyro-mad()-Z"
## [433] "fBodyGyro-max()-X"
## [434] "fBodyGyro-max()-Y"
## [435] "fBodyGyro-max()-Z"
## [436] "fBodyGyro-min()-X"
## [437] "fBodyGyro-min()-Y"
## [438] "fBodyGyro-min()-Z"
## [439] "fBodyGyro-sma()"
## [440] "fBodyGyro-energy()-X"
## [441] "fBodyGyro-energy()-Y"
## [442] "fBodyGyro-energy()-Z"
## [443] "fBodyGyro-iqr()-X"
## [444] "fBodyGyro-iqr()-Y"
## [445] "fBodyGyro-iqr()-Z"
## [446] "fBodyGyro-entropy()-X"
## [447] "fBodyGyro-entropy()-Y"
## [448] "fBodyGyro-entropy()-Z"
## [449] "fBodyGyro-maxInds-X"
## [450] "fBodyGyro-maxInds-Y"
## [451] "fBodyGyro-maxInds-Z"
## [452] "fBodyGyro-meanFreq()-X"
## [453] "fBodyGyro-meanFreq()-Y"
## [454] "fBodyGyro-meanFreq()-Z"
## [455] "fBodyGyro-skewness()-X"
## [456] "fBodyGyro-kurtosis()-X"
## [457] "fBodyGyro-skewness()-Y"
## [458] "fBodyGyro-kurtosis()-Y"
## [459] "fBodyGyro-skewness()-Z"
## [460] "fBodyGyro-kurtosis()-Z"
## [461] "fBodyGyro-bandsEnergy()-1,8"
## [462] "fBodyGyro-bandsEnergy()-9,16"
## [463] "fBodyGyro-bandsEnergy()-17,24"
## [464] "fBodyGyro-bandsEnergy()-25,32"
## [465] "fBodyGyro-bandsEnergy()-33,40"
## [466] "fBodyGyro-bandsEnergy()-41,48"
## [467] "fBodyGyro-bandsEnergy()-49,56"
## [468] "fBodyGyro-bandsEnergy()-57,64"
## [469] "fBodyGyro-bandsEnergy()-1,16"
## [470] "fBodyGyro-bandsEnergy()-17,32"
## [471] "fBodyGyro-bandsEnergy()-33,48"
## [472] "fBodyGyro-bandsEnergy()-49,64"
## [473] "fBodyGyro-bandsEnergy()-1,24"
## [474] "fBodyGyro-bandsEnergy()-25,48"
## [475] "fBodyGyro-bandsEnergy()-1,8"
## [476] "fBodyGyro-bandsEnergy()-9,16"
## [477] "fBodyGyro-bandsEnergy()-17,24"
## [478] "fBodyGyro-bandsEnergy()-25,32"
## [479] "fBodyGyro-bandsEnergy()-33,40"
## [480] "fBodyGyro-bandsEnergy()-41,48"
## [481] "fBodyGyro-bandsEnergy()-49,56"
## [482] "fBodyGyro-bandsEnergy()-57,64"
## [483] "fBodyGyro-bandsEnergy()-1,16"
## [484] "fBodyGyro-bandsEnergy()-17,32"
## [485] "fBodyGyro-bandsEnergy()-33,48"
## [486] "fBodyGyro-bandsEnergy()-49,64"
## [487] "fBodyGyro-bandsEnergy()-1,24"
## [488] "fBodyGyro-bandsEnergy()-25,48"
## [489] "fBodyGyro-bandsEnergy()-1,8"
## [490] "fBodyGyro-bandsEnergy()-9,16"
## [491] "fBodyGyro-bandsEnergy()-17,24"
## [492] "fBodyGyro-bandsEnergy()-25,32"
## [493] "fBodyGyro-bandsEnergy()-33,40"
## [494] "fBodyGyro-bandsEnergy()-41,48"
## [495] "fBodyGyro-bandsEnergy()-49,56"
## [496] "fBodyGyro-bandsEnergy()-57,64"
## [497] "fBodyGyro-bandsEnergy()-1,16"
## [498] "fBodyGyro-bandsEnergy()-17,32"
## [499] "fBodyGyro-bandsEnergy()-33,48"
## [500] "fBodyGyro-bandsEnergy()-49,64"
## [501] "fBodyGyro-bandsEnergy()-1,24"
## [502] "fBodyGyro-bandsEnergy()-25,48"
## [503] "fBodyAccMag-mean()"
## [504] "fBodyAccMag-std()"
## [505] "fBodyAccMag-mad()"
## [506] "fBodyAccMag-max()"
## [507] "fBodyAccMag-min()"
## [508] "fBodyAccMag-sma()"
## [509] "fBodyAccMag-energy()"
## [510] "fBodyAccMag-iqr()"
## [511] "fBodyAccMag-entropy()"
## [512] "fBodyAccMag-maxInds"
## [513] "fBodyAccMag-meanFreq()"
## [514] "fBodyAccMag-skewness()"
## [515] "fBodyAccMag-kurtosis()"
## [516] "fBodyBodyAccJerkMag-mean()"
## [517] "fBodyBodyAccJerkMag-std()"
## [518] "fBodyBodyAccJerkMag-mad()"
## [519] "fBodyBodyAccJerkMag-max()"
## [520] "fBodyBodyAccJerkMag-min()"
## [521] "fBodyBodyAccJerkMag-sma()"
## [522] "fBodyBodyAccJerkMag-energy()"
## [523] "fBodyBodyAccJerkMag-iqr()"
## [524] "fBodyBodyAccJerkMag-entropy()"
## [525] "fBodyBodyAccJerkMag-maxInds"
## [526] "fBodyBodyAccJerkMag-meanFreq()"
## [527] "fBodyBodyAccJerkMag-skewness()"
## [528] "fBodyBodyAccJerkMag-kurtosis()"
## [529] "fBodyBodyGyroMag-mean()"
## [530] "fBodyBodyGyroMag-std()"
## [531] "fBodyBodyGyroMag-mad()"
## [532] "fBodyBodyGyroMag-max()"
## [533] "fBodyBodyGyroMag-min()"
## [534] "fBodyBodyGyroMag-sma()"
## [535] "fBodyBodyGyroMag-energy()"
## [536] "fBodyBodyGyroMag-iqr()"
## [537] "fBodyBodyGyroMag-entropy()"
## [538] "fBodyBodyGyroMag-maxInds"
## [539] "fBodyBodyGyroMag-meanFreq()"
## [540] "fBodyBodyGyroMag-skewness()"
## [541] "fBodyBodyGyroMag-kurtosis()"
## [542] "fBodyBodyGyroJerkMag-mean()"
## [543] "fBodyBodyGyroJerkMag-std()"
## [544] "fBodyBodyGyroJerkMag-mad()"
## [545] "fBodyBodyGyroJerkMag-max()"
## [546] "fBodyBodyGyroJerkMag-min()"
## [547] "fBodyBodyGyroJerkMag-sma()"
## [548] "fBodyBodyGyroJerkMag-energy()"
## [549] "fBodyBodyGyroJerkMag-iqr()"
## [550] "fBodyBodyGyroJerkMag-entropy()"
## [551] "fBodyBodyGyroJerkMag-maxInds"
## [552] "fBodyBodyGyroJerkMag-meanFreq()"
## [553] "fBodyBodyGyroJerkMag-skewness()"
## [554] "fBodyBodyGyroJerkMag-kurtosis()"
## [555] "angle(tBodyAccMean,gravity)"
## [556] "angle(tBodyAccJerkMean),gravityMean)"
## [557] "angle(tBodyGyroMean,gravityMean)"
## [558] "angle(tBodyGyroJerkMean,gravityMean)"
## [559] "angle(X,gravityMean)"
## [560] "angle(Y,gravityMean)"
## [561] "angle(Z,gravityMean)"
## [562] "subject"
## [563] "activity"
duplicated_index = which(duplicated(colnames(samsungData)))
duplicated_columns =unique(colnames(samsungData)[(duplicated(colnames(samsungData)))])
samsungData.new = samsungData[,!(duplicated(colnames(samsungData)))]
dim(samsungData.new)
## [1] 7352 479
## adding .index to each duplicated column
for (each in duplicated_columns){
ind = which(colnames(samsungData)==each)
colnames(samsungData)[ind]= unlist(lapply(1:length(ind), function(i) paste(colnames(samsungData)[ind[i]], i, sep=".") ))
}
head(colnames(samsungData)[duplicated_index])
## [1] "fBodyAcc-bandsEnergy()-1,8.2" "fBodyAcc-bandsEnergy()-9,16.2"
## [3] "fBodyAcc-bandsEnergy()-17,24.2" "fBodyAcc-bandsEnergy()-25,32.2"
## [5] "fBodyAcc-bandsEnergy()-33,40.2" "fBodyAcc-bandsEnergy()-41,48.2"
summary(factor(samsungData[, dimdata[2]]))
## laying sitting standing walk walkdown walkup
## 1407 1286 1374 1226 986 1073
summary(factor(samsungData[, dimdata[2]-1]))
## 1 3 5 6 7 8 11 14 15 16 17 19 21 22 23 25 26 27
## 347 341 302 325 308 281 316 323 328 366 368 360 408 321 372 409 392 376
## 28 29 30
## 382 344 383
barplot(summary(factor(samsungData[, dimdata[2]-1])), cex.names =0.6)
title("number of data points for each subject")
my_df = table(samsungData$subject, samsungData$activity)
par(cex.main = 1)
plot(my_df, main="")
title(main="distribution of class labels per subject", outer = FALSE)
barplot(t(as.matrix(my_df)), beside = TRUE, main = "number of datapoints for each activity per subject", col = c(3,4,6,7,3,1))
legend("topright", colnames(my_df), col = c(3,4,6,7,3,1), lty = 1, lwd = 2)
last_feature_index = dimdata[2]-2
for (i in seq(1,last_feature_index, 50)) {
if (i+50 < last_feature_index) {
end = i + 50
} else {
end =last_feature_index
}
boxplot(samsungData[,i:end], main=paste("[",i, ",",(i+50), "]"))
}
split_data = function (data, seed) {
set.seed(seed) # set seeed for reproducibility
subject_list = attributes(factor(data[,dim(data)[2]-1]))$levels
training_ratio = 0.8
n_train = floor(training_ratio*length(subject_list))
trainning_subjects = sample(subject_list, n_train, replace = FALSE)
trainning_indices = which( data[,"subject"]%in%trainning_subjects)
test_subjects = subject_list[!(subject_list%in%trainning_indices)]
test_indices = which(!data[,"subject"]%in%trainning_subjects)
write.table(data, file = "samsungData_fixed-duplicated-columns.csv", row.names = FALSE, col.names = TRUE, sep = "," )
write.table(data[trainning_indices,], file = "samsungData_fixed-duplicated-columns.train.csv", row.names = FALSE, col.names = TRUE, sep = "," )
write.table(data[test_indices,], file = "samsungData_fixed-duplicated-columns.test.csv", row.names = FALSE, col.names = TRUE, sep = "," )
}
split_data(data = samsungData, seed = 123)
# The following two commands remove any previously installed H2O packages for R.
if ("package:h2o" %in% search()) { detach("package:h2o", unload=TRUE) }
if ("h2o" %in% rownames(installed.packages())) { remove.packages("h2o") }
# Next, we download packages that H2O depends on.
if (! ("methods" %in% rownames(installed.packages()))) { install.packages("methods") }
if (! ("statmod" %in% rownames(installed.packages()))) { install.packages("statmod") }
if (! ("stats" %in% rownames(installed.packages()))) { install.packages("stats") }
if (! ("graphics" %in% rownames(installed.packages()))) { install.packages("graphics") }
if (! ("RCurl" %in% rownames(installed.packages()))) { install.packages("RCurl") }
if (! ("jsonlite" %in% rownames(installed.packages()))) { install.packages("jsonlite") }
if (! ("tools" %in% rownames(installed.packages()))) { install.packages("tools") }
if (! ("utils" %in% rownames(installed.packages()))) { install.packages("utils") }
# h2o_3.8.2.6
install.packages("h2o", type="source", repos=(c("https://h2o-release.s3.amazonaws.com/h2o/rel-turchin/6/R")))
## load modules and start h2o compute node
library(h2o)
## Loading required package: statmod
##
## ----------------------------------------------------------------------
##
## Your next step is to start H2O:
## > h2o.init()
##
## For H2O package documentation, ask for help:
## > ??h2o
##
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
##
## ----------------------------------------------------------------------
##
## Attaching package: 'h2o'
## The following objects are masked from 'package:stats':
##
## sd, var
## The following objects are masked from 'package:base':
##
## &&, %*%, %in%, ||, apply, as.factor, as.numeric, colnames,
## colnames<-, ifelse, is.character, is.factor, is.numeric, log,
## log10, log1p, log2, round, signif, trunc
localH2O = h2o.init(ip = "localhost", startH2O = TRUE)
## Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 29 minutes 27 seconds
## H2O cluster version: 3.8.2.6
## H2O cluster name: H2O_started_from_R_tkhunkhe_qzr755
## H2O cluster total nodes: 1
## H2O cluster total memory: 1.52 GB
## H2O cluster total cores: 4
## H2O cluster allowed cores: 2
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## R Version: R version 3.3.0 (2016-05-03)
# upload file to h2o
samsungData.hex = h2o.uploadFile(path = "samsungData_fixed-duplicated-columns.train.csv")
dim(samsungData.hex)
ncol = dim(samsungData.hex)[2]
x = colnames(samsungData.hex)[-((ncol-1):ncol)]
y = colnames(samsungData.hex)[ncol]
# classification with random forest, and get the top most important features used
list_models = read.table("model_id.txt", sep="\t", header = FALSE, col.names = c('model', 'model_id'), stringsAsFactors = FALSE)
rownames(list_models) = list_models[,1]
list_models = list_models[-1]
tryCatch ( {
model <<- h2o.getModel(list_models['full model',])}, # <<- save to global
error=function(e) {
model <<- h2o.randomForest(x, y, seed = 123, samsungData.hex, nfolds = 10)
write(paste("full model",model@model_id, sep="\t"), "model_id.txt", append = TRUE)} )
h2o.confusionMatrix(model)
## Confusion Matrix: vertical: actual; across: predicted
## laying sitting standing walk walkdown walkup Error Rate
## laying 1092 0 0 0 0 0 0.0000 = 0 / 1,092
## sitting 0 955 59 0 0 0 0.0582 = 59 / 1,014
## standing 0 21 1063 0 0 0 0.0194 = 21 / 1,084
## walk 0 1 2 942 5 5 0.0136 = 13 / 955
## walkdown 0 0 0 4 753 7 0.0144 = 11 / 764
## walkup 0 0 0 1 1 824 0.0024 = 2 / 826
## Totals 1092 977 1124 947 759 836 0.0185 = 106 / 5,735
1 - h2o.confusionMatrix(model)['Totals','Error']
## [1] 0.981517
# select top features
top5_important_feature = model@model$variable_importances$variable[1:5]
top4_important_feature = model@model$variable_importances$variable[1:4]
top3_important_feature = model@model$variable_importances$variable[1:3]
top2_important_feature = model@model$variable_importances$variable[1:2]
top1_important_feature = model@model$variable_importances$variable[1]
### Train models, starting from using top 3 importand features until 80% accuracy is acchieved
tryCatch( {
model.with.1.features<<- h2o.getModel(list_models['1-featured model',])
model.with.2.features<<- h2o.getModel(list_models['2-featured model',])
model.with.3.features<<- h2o.getModel(list_models['3-featured model',])
model.with.4.features<<- h2o.getModel(list_models['4-featured model',])
model.with.5.features<<- h2o.getModel(list_models['5-featured model',])
}, error = function(e) {
model.with.1.features <<- h2o.randomForest(top1_important_feature, y, seed = 123, samsungData.hex, nfolds = 10)
model.with.2.features <<- h2o.randomForest(top2_important_feature, y, seed = 123, samsungData.hex, nfolds = 10)
model.with.3.features <<- h2o.randomForest(top3_important_feature, y, seed = 123, samsungData.hex, nfolds = 10)
model.with.4.features <<- h2o.randomForest(top4_important_feature, y, seed = 123, samsungData.hex, nfolds = 10)
model.with.5.features <<- h2o.randomForest(top5_important_feature, y, seed = 123, samsungData.hex, nfolds = 10)
write(paste("1-featured model",model.with.1.features@model_id, sep="\t"), "model_id.txt", append = TRUE)
write(paste("2-featured model",model.with.2.features@model_id, sep="\t"), "model_id.txt", append = TRUE)
write(paste("3-featured model",model.with.3.features@model_id, sep="\t"), "model_id.txt", append = TRUE)
write(paste("4-featured model",model.with.4.features@model_id, sep="\t"), "model_id.txt", append = TRUE)
write(paste("5-featured model",model.with.5.features@model_id, sep="\t"), "model_id.txt", append = TRUE)
})
accuracy = c()
model_list = list( model.with.1.features,model.with.2.features,model.with.3.features,model.with.4.features,model.with.5.features)
for (i in 1:length(model_list)){
accuracy[i] = as.numeric(model_list[[i]]@model$cross_validation_metrics_summary['accuracy','mean'])
}
accuracy
## [1] 0.4983688 0.7921445 0.8808801 0.8851942 0.8941538
par(mfrow=c(1,1), cex.axis = 0.8)
plot(1:5, accuracy, type="l", xlab = "")
abline(h = 0.80, lty=2 )
title("Accuracy of random forest models trained with different number of features", cex.main=0.8, xlab = "number of features", ylab = "accuracy", outer = FALSE)
for (i in 1:length(accuracy)){
this.accuracy = accuracy[i]
if (this.accuracy >= 0.8) {
num_features_selected <<- i
break
}
}
num_features_selected
## [1] 3
selected_model = model_list[num_features_selected]
selected_features = top5_important_feature[1:num_features_selected]
h2o.confusionMatrix(selected_model)
## NULL
samsungData.test.hex = h2o.uploadFile(path = "samsungData_fixed-duplicated-columns.test.csv")
dim(samsungData.test.hex)
ncol = dim(samsungData.test.hex)[2]
x =selected_features
y = colnames(samsungData.test.hex)[ncol]
# classification of the test data with random forest
model.test = h2o.randomForest(x, y, seed = 123, samsungData.test.hex)
write(paste("test_model",model.test@model_id, sep="\t"), "model_id.txt", append = TRUE)
h2o.confusionMatrix(model.test)
## Confusion Matrix: vertical: actual; across: predicted
## laying sitting standing walk walkdown walkup Error Rate
## laying 315 0 0 0 0 0 0.0000 = 0 / 315
## sitting 0 263 5 1 3 0 0.0331 = 9 / 272
## standing 0 3 248 23 10 6 0.1448 = 42 / 290
## walk 0 1 26 234 7 3 0.1365 = 37 / 271
## walkdown 0 1 2 22 192 5 0.1351 = 30 / 222
## walkup 0 0 2 16 9 220 0.1093 = 27 / 247
## Totals 315 268 283 296 221 234 0.0897 = 145 / 1,617
test.accuracy = 1 - h2o.confusionMatrix(model.test)['Totals', 'Error']
test.accuracy
## [1] 0.9103278
test.accuracy = 1 - h2o.confusionMatrix(model.test)['Totals', 'Error']
test.accuracy
## [1] 0.9103278
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: